/*
	Cleaning file for Study 1 experiments.
	Code provided for reference. Raw data is not provided to preserve
	participant anonymity.

*/


//------------------------------------------------------------------------------ Prep raw data
	
// Prolific (Asians)
	insheet using study2_prolific_raw.csv, comma name clear

	drop in 1/2	
	gen time = round(duration/60, .1) // in mins
	gen exflag_timeinvalid = time < 2 //!inrange(time, 5, 30) // took too little time or too much time
	drop time
	drop if progress < 90
	
	cap confirm numeric variable zip
	if _rc != 0 {
		gen temp = real(zip)
		drop zip
		rename temp zipcode
	}
	
	drop if status == "Survey Preview"
	drop status ipaddress progress duration finished recorded response ///
		recipient* external location* distribution userlanguage consent age18 feedback mturkcode ///
		fl_*

	d, varlist
	foreach v in `r(varlist)' {
		cap confirm numeric variable `v'
		if _rc != 0	replace `v' = "" if `v' == "-99"
	}

	gen source = "prolific"

//------------------------------------------------------ PRE TREATMENT		
/*
	Demographics
*/
	gen r_identify_asian = race == "Asian" // Did R self-identify as Asian?
	gen r_age = 2020 - real(byear)
	gen r_male = gender == "Male"
	gen r_female = gender == "Female"
	replace race = "Black" if race == "Black/African-American"
	
	gen r_asian = r_identify == 1 | notasian_check == "Yes"

	// Multiple countries indicator
	gen cntry_bangladesh = 1 if country_a_1 == "Bangladesh"
	gen cntry_cambodia = 1 if country_a_2 == "Cambodia"
	gen cntry_china = 1 if country_a_3 == "China"
	gen cntry_taiwan = 1 if country_a_4 == "Taiwan"
	gen cntry_india = 1 if country_a_5 == "India"
	gen cntry_indonesia = 1 if country_a_6 == "Indonesia"
	gen cntry_japan = 1 if country_a_7 == "Japan"
	gen cntry_korea = 1 if country_a_8 == "Korea"
	gen cntry_laos = 1 if country_a_9 == "Laos"
	gen cntry_malaysia = 1 if country_a_10 == "Malaysia"
	gen cntry_myanmar = 1 if country_a_11 == "Myanmar"
	gen cntry_philippines = 1 if country_a_12 == "The Philippines"
	gen cntry_singapore = 1 if country_a_13 == "Singapore"
	gen cntry_thailand = 1 if country_a_14 == "Thailand"
	gen cntry_vietnam = 1 if country_a_15 == "Vietnam"
	gen cntry_pakistan = 1 if country_a_19 == "Pakistan"
	egen cntry_num = rowtotal(cntry_*)
	

// Generation - NOTE: might need to add a question about citizenship if I'm screening on it
	cap drop temp
	encode gen, gen(temp) lab(labtemp)
	cap drop generation
	gen generation = .
	replace generation = 1 if inlist(temp, 3, 4)
	replace generation = 2 if temp == 2
	replace generation = 3 if temp == 1
	label drop labtemp
	drop temp


// High Low Identifiers
	gen imp_asian = 0 if idimp_asian == "Not at all important"
	replace imp_asian = 0.333 if idimp_asian == "Somewhat important"
	replace imp_asian = 0.667 if idimp_asian == "Important"
	replace imp_asian = 1 if idimp_asian == "Very important"
	label var imp_asian "Importance of being Asian (0-1)"
	
	gen idstr_strong = imp_asian > 0.5 & !missing(imp_asian)
	gen idstr_weak = imp_asian < 0.5
	
// PID
	gen pid_dem = pid == "Democrat" 
	gen pid_dem_lean = pid_dem == 1 | pid_lean == "Democratic"
	gen pid_gop = pid == "Republican" 
	gen pid_gop_lean = pid_gop == 1 | pid_lean == "Republican"
	gen pid_ind = pid == "Independent"
	gen pid_ind_pure = pid_ind == 1 & pid_lean == "Neither"
	
	gen pol_ideo_7 = -3 if ideology == "Extremely liberal"
	replace pol_ideo_7 = -2 if ideology == "Liberal"
	replace pol_ideo_7 = -1 if ideology == "Slightly liberal"
	replace pol_ideo_7 = 0 if ideology == "Moderate/Middle of road"
	replace pol_ideo_7 = 1 if ideology == "Slightly conservative"
	replace pol_ideo_7 = 2 if ideology == "Conservative"
	replace pol_ideo_7 = 3 if ideology == "Extremely conservative"
	
// Attitudes toward Harris
	foreach h in repasam repblack repmy repmycomm {
		cap drop temp
		gen temp = 0 if harris_`h' == "Strongly disagree"
		replace temp = 0.25 if harris_`h' == "Somewhat disagree"
		replace temp = 0.50 if harris_`h' == "Neither agree nor disagree"
		replace temp = 0.75 if harris_`h' == "Somewhat agree"
		replace temp = 1 if harris_`h' == "Strongly agree"
		drop harris_`h'
		rename temp harris_`h'
	}
	cap drop temp
	gen temp = 0 if harris_favorability == "Extremely unfavorable"
	replace temp = 0.25 if harris_favorability == "Somewhat unfavorable"
	replace temp = 0.50 if harris_favorability == "Neither favorable nor unfavorable"
	replace temp = 0.75 if harris_favorability == "Somewhat favorable"
	replace temp = 1 if harris_favorability == "Extremely favorable"
	drop harris_favorability
	rename temp harris_favor
	
	
// Asianness of individuals
	foreach name in harris yang duckworth jindal omar {
		gen rate_`name' = 0 if howasian_`name' == "Not at all Asian"
		replace rate_`name' = 0.25 if howasian_`name' == "A little Asian"
		replace rate_`name' = 0.50 if howasian_`name' == "Somewhat Asian"
		replace rate_`name' = 0.75 if howasian_`name' == "A typical Asian"
		replace rate_`name' = 1 if strpos(howasian_`name', "Who") != 0
	}
	
// Asianness of Groups
	forvalues gr = 1/19 {
		if `gr' == 1 loc group "Chinese"
		if `gr' == 2 loc group "Japanese"
		if `gr' == 3 loc group "Korean" 
		if `gr' == 4 loc group "Indian"
		if `gr' == 5 loc group "Filipino"
		if `gr' == 6 loc group "Vietnamese"
		if `gr' == 7 loc group "Pakistani"
		if `gr' == 8 loc group "Singaporean"
		if `gr' == 9 loc group "Thai"
		if `gr' == 10 loc group "Malaysian"
		if `gr' == 11 loc group "Bangladeshi"
		if `gr' == 12 loc group "Hmong"
		if `gr' == 13 loc group "Indonesian"
		if `gr' == 14 loc group "Laotian"
		if `gr' == 15 loc group "Cambodian"
		if `gr' == 16 loc group "British"
		if `gr' == 17 loc group "Australian"
		if `gr' == 18 loc group "Russian"
		if `gr' == 19 loc group "Iranian"
		
		loc group = lower("`group'")
	
		gen rate_`group' = 0 if howasian_groups_`gr' == "Not at all representative"
		replace rate_`group' = 0.25 if howasian_groups_`gr' == "A little representative"
		replace rate_`group' = 0.50 if howasian_groups_`gr' == "Somewhat representative"
		replace rate_`group' = 0.75 if howasian_groups_`gr' == "Representative"
		replace rate_`group' = 1 if howasian_groups_`gr' == "Extremely representative"
		
		gen alt_`group' = 0 if howasian_alt_`gr' == "Not at all Asian"
		replace alt_`group' = 0.25 if howasian_alt_`gr' == "A little Asian"
		replace alt_`group' = 0.5 if howasian_alt_`gr' == "Somewhat Asian"
		replace alt_`group' = 0.75 if howasian_alt_`gr' == "A typical Asian"
		replace alt_`group' = 1 if strpos(howasian_alt_`gr', "Who") != 0
	}

	forvalues gr = 1/4 {
		if `gr' == 1 loc group "multi"
		if `gr' == 2 loc group "halfwhite"
		if `gr' == 3 loc group "halfblack" 
		if `gr' == 4 loc group "adopted"
	
		gen rate_`group' = 0 if howasian_multi_`gr' == "Not at all representative"
		replace rate_`group' = 0.25 if howasian_multi_`gr' == "A little representative"
		replace rate_`group' = 0.50 if howasian_multi_`gr' == "Somewhat representative"
		replace rate_`group' = 0.75 if howasian_multi_`gr' == "Representative"
		replace rate_`group' = 1 if howasian_multi_`gr' == "Extemely representative"
		
		gen alt_`group' = 0 if howasian_altfam_`gr' == "Not at all Asian"
		replace alt_`group' = 0.25 if howasian_altfam_`gr' == "A little Asian"
		replace alt_`group' = 0.5 if howasian_altfam_`gr' == "Somewhat Asian"
		replace alt_`group' = 0.75 if howasian_altfam_`gr' == "A typical Asian"
		replace alt_`group' = 1 if strpos(howasian_altfam_`gr', "Who") != 0
	}

	// Attention Check (howasian_alt_20, howasian_groups_20)
	gen acq_pass = howasian_alt_20 == "Very Asian" if measure == 0
	replace acq_pass = howasian_groups_20 == "Representative" if measure == 1
	
	drop howasian_* country_*
	
	// Candidate Conditions
	gen duck = inlist(treatment, "duck1", "duck2")
	gen harris = !inlist(treatment, "duck1", "duck2")
	

	// Treatment
	gen tr_asian = treatment == "asian"
	gen tr_control = treatment == "control"
	gen tr_indian = treatment == "indian"
	gen tr_black = treatment == "black"
	gen tr_multi = treatment == "multi"
	gen tr_duckt = treatment == "duck1"
		replace tr_duckt = 0 if missing(tr_duckt)
	gen tr_duckc = treatment == "duck2"
		replace tr_duckc = 0 if missing(tr_duckc)
	
	// Recode representation variables
	foreach var in repasam repblack repmy repmycomm favor {
		gen duckworth_`var' = harris_`var' if inlist(treatment, "duck1", "duck2")
		replace harris_`var' = . if inlist(treatment, "duck1", "duck2")
	}
	
	
	save data_study1.dta, replace
